
import os
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import cv2
import numpy as np
from timeit import default_timer as timer
from datetime import timedelta
from IPython.display import Image, display
%matplotlib inline
plt.rcParams.update({'font.size': 12})
from mask_utils import rle_to_mask
%reload_ext autoreload
%autoreload 2
train_images_list = \
pd.DataFrame( {"ImageId": [entry.name for entry in os.scandir(os.path.join('data', 'train_images')) if entry.is_file()]} )
print( "{:,}".format( len( train_images_list ) ) + " training images" )
train_csv = pd.read_csv(os.path.join('data', 'train.csv')).set_index(['ImageId'])
train_csv.head()
print("The metadata file accompgnying the " + "{:,}".format( len( train_images_list ) ) + " training images " +
"contains some " + "{:,}".format( train_csv.shape[0] + 1 ) + " metadata rows.")
Lets look into how these two collections match with each other :
train_data = pd.merge( pd.DataFrame( train_images_list ), train_csv, on='ImageId', how='outer')
train_data.head(10)
print( "{:,}".format( train_data.shape[0] ) + " total rows" )
Some images have no reported defect, some other have more than 1 'type' of defect (column 'ClassId').
columns = 2
rows = 10
fig = plt.figure(figsize=(20,columns*rows+2))
filtered_train_data = train_data[train_data['EncodedPixels'].notnull()]
for i in range(0,columns*rows):
fn = filtered_train_data['ImageId'].iloc[i]
c = filtered_train_data['ClassId'].iloc[i]
fig.add_subplot(rows, columns, i+1).set_title(fn+" ClassId="+ str(c) )
img = cv2.imread( os.path.join('data', 'train_images', fn ) )
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = rle_to_mask(filtered_train_data['EncodedPixels'].iloc[i], 256, 1600)
# max value on the blue channel
img[ mask==1, 2 ] = 255
plt.imshow(img)
plt.show()
# get rid of 'ClassId' and, group RLEs into 1 list per 'ImageId'
grouped_train_data = train_data.groupby('ImageId')['EncodedPixels'].apply(list)
grouped_train_data.head()
We can now easily identify which image has more than 1 RLE (i.e. which image has reported defects of 2 or more different types) :
# identify and count the images with defects of more than 1 single class
# (in order to be able to later merge their masks into 1 per 'ImageId')
count2 = 0
count3 = 0
count4 = 0
for img in grouped_train_data.iteritems() :
if len( img[1] ) == 2 :
count2+=1
elif len( img[1] ) == 3 :
count3+=1
elif len( img[1] ) == 4 :
count4+=1
print( "{:,}".format( count2 ) + " images with defects of 2 different classes" )
print( "{:,}".format( count3 ) + " images with defects of 3 different classes" )
print( "{:,}".format( count4 ) + " images with defects of 4 different classes" )
As an illustration, lets look at an image with defects of 2 different classes and color-code those defect classes :
imageId = '0025bde0c.jpg'
len( grouped_train_data[imageId] )
img = cv2.imread( os.path.join('data', 'train_images', imageId) )
for color_channel, rle in enumerate( grouped_train_data['0025bde0c.jpg'] ) :
mask = rle_to_mask( rle, 256, 1600 )
img[ mask==1, color_channel ] = 1
fig = plt.figure(figsize=(20,4))
plt.imshow(img)
plt.show()
We will handle these cases in the upcoming section of the workbook.
path = os.path.join('data', 'train_images', 'preprocessed')
if not os.path.exists( path ) : os.mkdir( path )
start = timer()
count = 0
for img in grouped_train_data.iteritems() :
# only consider training images with at least 1 reported defect
if not pd.isnull(img[1])[0] :
if not os.path.exists( os.path.join(path, img[0]) ) :
# normalize the input image
image = cv2.imread( os.path.join('data', 'train_images', img[0]) )
norm_image = image # image.astype(np.float32) / 255. # moved standardization inside custom_data_generator
cv2.imwrite( os.path.join(path, img[0]), norm_image )
name_suffix = img[0].partition( ".jpg" )[0]
if (
not os.path.exists( os.path.join(path, name_suffix + "_mask.jpg") )
) :
# compute the defect mask
if len( img[1] ) == 1 :
# case of defects of a single class
mask_image = rle_to_mask( img[1][0], 256, 1600)
else :
# case of defects of more than one class
# (we merge the associated masks into 1)
mask = np.zeros( (256, 1600) )
for i, rle in enumerate( img[1] ) :
#print( img[0]+ " - " + str( len( img[1] ) ) + " - " + str( i ) + " : " + str( rle ) )
mask_i = rle_to_mask(rle, 256, 1600)
mask = mask + mask_i
mask_image = ( mask > 0 ).astype('uint8')
cv2.imwrite( os.path.join(path, name_suffix + "_mask.jpg"), mask_image )
count += 1
print( "{:,}".format( count ) + " training images with reported defect(s) available" )
print(timedelta(seconds=timer()-start))
We can confirm below that, when applicable, we also applied the masks-merge operation and that it was succesful by looking at our previous example again. All the defects area is indeed covered by one single mask now :
imageId = '0025bde0c.jpg'
img = cv2.imread( os.path.join('data', 'train_images', imageId) )
mask = cv2.threshold(
cv2.imread( os.path.join(path, imageId.partition( ".jpg" )[0] + "_mask.jpg"), cv2.IMREAD_GRAYSCALE )
, .5, 1, cv2.THRESH_BINARY)[1]
print( os.path.join(path, imageId.partition( ".jpg" )[0] + "_mask.jpg") )
print( np.amax(mask) )
# max value on the blue channel
img[ mask==1, 2 ] = 255
fig = plt.figure(figsize=(20,4))
plt.imshow(img)
plt.show()
For Deep Learning models to perform best, we can either increase the depth of their inner layers and/or use more data at training time. The purpose of data augmentation is to increase the amount of training data.
As can be observed on the below example, from one pair (image, defect mask), we will generate 3 additional new pairs. That way, our training sample will be quadrupled in size.
We will perform combinations of flipping and mirroring on each original training record. Let's look at what the result would be on one such training record (top-left quadran of the below picture) :
imageId = 'fffe98443.jpg' #'0002cc93b.jpg'
img = cv2.imread( os.path.join('data', 'train_images', imageId) )
mask = rle_to_mask(filtered_train_data[filtered_train_data.ImageId == imageId]['EncodedPixels'].iloc[0], 256, 1600)
img_flip_ud = cv2.flip(img, 0) # flip vertically
mask_flip_ud = cv2.flip(mask, 0)
img_flip_lr = cv2.flip(img, 1) # flip horizontally
mask_flip_lr = cv2.flip(mask, 1)
img_flip_ud_lr = cv2.flip(img, -1) # flip vertically and horizontally
mask_flip_ud_lr = cv2.flip(mask, -1)
fig = plt.figure(figsize=(20,4))
fig.add_subplot(2, 2, 1).set_title("img")
# max value on the blue channel
img[ mask==1, 2 ] = 255
plt.imshow(img)
fig.add_subplot(2, 2, 2).set_title("img_flip")
# max value on the blue channel
img_flip_ud[ mask_flip_ud==1, 2 ] = 255
plt.imshow(img_flip_ud)
fig.add_subplot(2, 2, 3).set_title("img_mirror")
# max value on the blue channel
img_flip_lr[ mask_flip_lr==1, 2 ] = 255
plt.imshow(img_flip_lr)
fig.add_subplot(2, 2, 4).set_title("img_flip_mirror")
# max value on the blue channel
img_flip_ud_lr[ mask_flip_ud_lr==1, 2 ] = 255
plt.imshow(img_flip_ud_lr)
plt.show()
Let us do that on the entire original training dataset then :
start = timer()
path = os.path.join('data', 'train_images', 'preprocessed')
count = 0
for img in grouped_train_data.iteritems() :
# only consider training images with at least 1 reported defect
if not pd.isnull(img[1])[0] :
name_suffix = img[0].partition( ".jpg" )[0]
image = None
mask_image = None
if not os.path.exists( os.path.join(path, name_suffix + "_flip.jpg") ) :
if image is None :
image = cv2.imread( os.path.join(path, img[0]) )
image_flip = cv2.flip(image, 0) # flip vertically
cv2.imwrite( os.path.join(path, name_suffix + "_flip.jpg"), image_flip )
if not os.path.exists( os.path.join(path, name_suffix + "_mask_flip.jpg") ) :
if mask_image is None :
mask_image = cv2.imread( os.path.join(path, name_suffix + "_mask.jpg"), cv2.IMREAD_GRAYSCALE )
mask_image_flip = cv2.flip(mask_image, 0) # flip vertically
cv2.imwrite( os.path.join(dest_path, name_suffix + "_mask_flip.jpg"), mask_image_flip )
if not os.path.exists( os.path.join(path, name_suffix + "_mirror.jpg") ) :
if image is None :
image = cv2.imread( path + img[0] )
image_mirror = cv2.flip(image, 1) # flip horizontally
cv2.imwrite( os.path.join(path, name_suffix + "_mirror.jpg"), image_mirror )
if not os.path.exists( os.path.join(path, name_suffix + "_mask_mirror.jpg") ) :
if mask_image is None :
mask_image = cv2.imread( os.path.join(path, name_suffix + "_mask.jpg"), cv2.IMREAD_GRAYSCALE )
mask_image_mirror = cv2.flip(mask_image, 1) # flip horizontally
cv2.imwrite( os.path.join(path, name_suffix + "_mask_mirror.jpg"), mask_image_mirror )
if not os.path.exists( os.path.join(path, name_suffix + "_flip_mirror.jpg") ) :
if image is None :
image = cv2.imread( path + img[0] )
image_flip_mirror = cv2.flip(image, -1) # flip vertically and horizontally
cv2.imwrite( os.path.join(path, name_suffix + "_flip_mirror.jpg"), image_flip_mirror )
if not os.path.exists( os.path.join(path, name_suffix + "_mask_flip_mirror.jpg") ) :
if mask_image is None :
mask_image = cv2.imread( os.path.join(path, name_suffix + "_mask.jpg"), cv2.IMREAD_GRAYSCALE )
mask_image_flip_mirror = cv2.flip(mask_image, -1) # flip vertically and horizontally
cv2.imwrite( os.path.join(path, name_suffix + "_mask_flip_mirror.jpg"), mask_image_flip_mirror )
count += 4
print( "{:,}".format( count ) + " training records available" )
print(timedelta(seconds=timer()-start))
We then end-up with a brand new 4-times-larger training dataset :
input_images = [entry.name for entry in os.scandir( os.path.join('data', 'train_images', 'preprocessed') )
if entry.is_file() and "_mask" not in entry.name]
#print( "{:,}".format( len(input_images) ) + " training images available" )
output_masks = [entry.name for entry in os.scandir( os.path.join('data', 'train_images', 'preprocessed') )
if entry.is_file() and "_mask" in entry.name]
#print( "{:,}".format( len(output_masks) ) + " training masks available" )
df = pd.DataFrame( {"ImageId": input_images, "MaskId": output_masks} )
print( "{:,}".format( df.shape[0] ) + " training records available" )
df.head(10)

For more detailled views of the architecture of the model employed, please refer to the APPENDICES at the bottom of this Notebook.
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import keras
from keras import backend as K
from keras.layers import Input, Conv2D
from keras.losses import binary_crossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras import Model
from segmentation_models import Unet
from model_utils import dice_coef, custom_data_generator
import dill as pickle
We'll use the Dice similarity coefficient as our performance metric (@see model_utils.dice_coef).
base_model = Unet(
backbone_name='resnet34',
input_shape=(128, 800, 3),
encoder_weights='imagenet',
classes=1,
activation='sigmoid'
)
inp = Input(shape=(128, 800, 1)) # gray-scaled input
l1 = Conv2D(3, (1, 1))(inp) # map 1 channel to 3 channels (from original U-Net)
out = base_model(l1)
model = Model(inp, out, name=base_model.name)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[dice_coef])
BATCH_SIZE = 32
idx = int(0.80*df.shape[0])
print( "{:,}".format( idx ) + ' training records ; + ' + "{:,}".format( df.shape[0] - idx ) + ' validation records.')
train_batches = custom_data_generator(df.iloc[:idx],batch_size=BATCH_SIZE,shuffle=True)
valid_batches = custom_data_generator(df.iloc[idx:],batch_size=BATCH_SIZE)
PATIENCE = 10
path = os.path.join('model') ; if not os.path.exists( path ) : os.mkdir( path )
start = timer()
history = model.fit_generator(
train_batches,
validation_data = valid_batches,
epochs = 40,
callbacks = get_callback(PATIENCE),
verbose = 1 )
model.save(os.path.join('model', 'my_model.h5')
with open('train_hstory.pickle', 'wb') as file_pi:
pickle.dump(history.history, file_pi)
print(timedelta(seconds=timer()-start))
The full trace can be found as an APPENDIX at the bottom of this Notebook.
import tensorflow as tf
print( '- Tensorflow ' + tf.__version__ )
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
model = tf.keras.models.load_model(
os.path.join('model', 'my_model.h5'),
custom_objects = {'dice_coef': dice_coef}
)
print( '- trained model loaded from h5df local file' )
with open(os.path.join('model', 'train_hstory.pickle'), 'rb') as f:
history_reloaded = pickle.load(f)
history_df = pd.DataFrame(history_reloaded)
print( '- model training history loaded from pickle local file' )
# PLOT TRAINING
fig = plt.figure(figsize=(10,15))
fig.add_subplot(2, 1, 1).set_title('Training Loss')
plt.plot(history_df.index,history_df['loss'],label='training')
plt.plot(history_df.index,history_df['val_loss'],label='validation')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(loc="upper right"); plt.ylim(0, .5)
fig.add_subplot(2, 1, 2).set_title('Training Performance')
plt.plot(history_df.index,history_df['dice_coef'],label='training')
plt.plot(history_df.index,history_df['val_dice_coef'],label='validation')
plt.xlabel('Epoch'); plt.ylabel('Dice_coef'); plt.legend(loc="lower right");
fig.tight_layout()
plt.show()
We can observe that progress on the validation dataset is rather slow and that it quickly plateaus.
import json
# save archi as JSON
json_string = model.to_json()
with open(os.path.join('model', 'my_model_archi.json'), 'w', encoding='utf-8') as f:
str_ = json_string
f.write(str(str_))
# save weights as h5df
model.save_weights(os.path.join('model', 'my_model_weights.h5'))
This model can then be reloaded using the following few lines :
# model reconstruction from JSON :
from tensorflow.keras.models import model_from_json
with open(os.path.join('model', 'my_model_archi.json'), 'r') as f:
json_string = f.read()
model_reconstructed = model_from_json(json_string)
# model trained weights loading from h5df :
model_reconstructed.load_weights(os.path.join('model', 'my_model_weights.h5'))
This is the approach we use in the Docker solution which integrates our model thru an API layer. This light version of the trained model is available here. Look for it with the defect-api-service source code.
from model_utils import get_predicted_defects_mask
from mask_utils import mask_add_pad, mask_to_contour
imageId = '0025bde0c.jpg'
## input image ##
img = cv2.imread( os.path.join('data', 'train_images', imageId) )
if img is None :
img = cv2.imread( os.path.join('data', 'test_images', imageId) )
## training_mask ##
training_mask = cv2.imread(os.path.join('data', 'train_images', 'preprocessed', imageId.partition(".jpg")[0] + "_mask.jpg")
, cv2.IMREAD_GRAYSCALE)
## predicted defects ##
predicted_mask = get_predicted_defects_mask( model, img, verbose = 1 )
max_pixel_prob = np.amax(predicted_mask)
print( "highest predicted pixel defect probability : " + '{0:2.2%}'.format( max_pixel_prob // 0.0001 / 10000 ) )
## plots ##
fig = plt.figure(figsize=(20,20))
fig.add_subplot(5, 1, 1).set_title('Ground truth - training mask')
training_img = img.copy()
if not training_mask is None :
training_mask = cv2.threshold( training_mask, .5*max_pixel_prob, 1, cv2.THRESH_BINARY)[1]
training_img[ training_mask==1, 2 ] = 255 # max value on the blue channel
plt.imshow(training_img)
fig.add_subplot(5, 1, 2).set_title('Low threshold predicted mask')
predicted_mask_lowthreshold = cv2.threshold( predicted_mask, .05, 1, cv2.THRESH_BINARY)[1]
lowthreshold_img = img.copy() ; lowthreshold_img[ predicted_mask_lowthreshold==1, 1 ] = 175 # max value on the green channel
plt.imshow(lowthreshold_img)
ax = fig.add_subplot(5, 1, 3)
ax.set_title('Heatmap predicted mask')
color_map = mpl.colors.LinearSegmentedColormap.from_list(
'unevently divided', [(0, 'gray'), (.05, 'yellow'), (1, 'green')])
im = plt.imshow(predicted_mask, cmap = color_map)
box = ax.get_position()
axColor = plt.axes([box.x0*1.05 + box.width * 1.05, box.y0, 0.01, box.height])
plt.colorbar(im, cax = axColor, orientation="vertical")
fig.add_subplot(5, 1, 4)
predicted_mask_otsu = (predicted_mask*255).astype('uint8')
(thresh, predicted_mask_otsu) = cv2.threshold(
predicted_mask_otsu, np.amax(predicted_mask_otsu)/2.,
np.amax(predicted_mask_otsu),
cv2.THRESH_BINARY | cv2.THRESH_OTSU
)
otsu_img = img.copy()
otsu_img[ predicted_mask_otsu==np.amax(predicted_mask_otsu), 1 ] = 175 # max value on the green channel
plt.imshow(otsu_img)
#plt.title('OTSU threshold predicted mask - B&W pixel value threshold : ' + "{:10.3f}".format( thresh/255 ) )
plt.title('OTSU threshold predicted mask - Binary pixel value threshold : ' + '{0:2.2%}'.format( thresh/255//0.0001/10000 ) )
ax = fig.add_subplot(5, 1, 5)
ax.set_title('OTSU threshold predicted contour')
#predicted_contour_otsu = cv2.threshold( predicted_mask, thresh, np.amax(predicted_mask_otsu), cv2.THRESH_BINARY)[1]
predicted_contour_otsu = mask_to_contour( mask_add_pad( predicted_mask_otsu, pad = 3 ) , width = 3 )
lowthreshold_contour_img = img.copy()
lowthreshold_contour_img[ predicted_contour_otsu==1, 1 ] = 255 # max value on the green channel
plt.imshow(lowthreshold_contour_img)
txt = ax.text(
.99, .05,
"highest pixel defect probability : " + '{0:2.2%}'.format( max_pixel_prob // 0.0001 / 10000 ) + "\n" +
'OTSU pixel value threshold : ' + '{0:2.2%}'.format( thresh/255//0.0001/10000 ),
horizontalalignment='right',
verticalalignment='bottom',
fontsize=14, color='#82ff51', weight='bold',
transform=ax.transAxes)
# add text contour, in case it is above a deffect (same color)
txt.set_path_effects([PathEffects.withStroke(linewidth=2, foreground='black')])
plt.show()
from mask_utils import prediction_to_json
json_prediction = prediction_to_json( predicted_mask )
json_obj = json.loads(json_prediction)
print( "{:,}".format( len(json_obj['contour_pixels']) ) + " contour pixels" )
json_obj['contour_pixels'] = json_obj['contour_pixels'][0:3] + ['...']
print( json.dumps( json_obj, indent = 4 ) ) # prettify (human-readable)

In order to downgrade the input from 3 (color) to 1 channel (gray scale), we did prepend a convolutional layer to the standard U-Net model :
model.summary()
print()
print(model.layers[2].name + " =>")
model.layers[2].summary()
from tensorflow.keras.utils import plot_model
plot_model(model, to_file=os.path.join('model', 'my_model.png'))
plot_model(model.layers[2], to_file=os.path.join('model', model.layers[2].name + '.png'))
display(Image(filename=os.path.join('model', 'my_model.png')))
display(Image(filename=os.path.join('model', model.layers[2].name + '.png')))